{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2 9 0 1]\n"
     ]
    }
   ],
   "source": [
    "series_1 = pd.Series([2,9,0,1])                # Creating a series object\n",
    "print(series_1.values)                         # Print values of the series object"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RangeIndex(start=0, stop=4, step=1)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series_1.index                # Default index of the series object"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "series_1.index = ['a','b','c','d']                 #Settnig index of the series object"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series_1['d']                # Fetching elemnet using new index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Creating dataframe using pandas\n",
    "class_data = {'Names':['John','Ryan','Emily'],\n",
    "             'Standard': [7,5,8],\n",
    "             'Subject': ['English','Mathematics','Science']}\n",
    "\n",
    "class_df = pd.DataFrame(class_data, index = ['Student1','Student2','Student3'],\n",
    "                       columns = ['Names','Standard','Subject'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          Names  Standard      Subject\n",
      "Student1   John         7      English\n",
      "Student2   Ryan         5  Mathematics\n",
      "Student3  Emily         8      Science\n"
     ]
    }
   ],
   "source": [
    "print(class_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Student1     John\n",
       "Student2     Ryan\n",
       "Student3    Emily\n",
       "Name: Names, dtype: object"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "class_df.Names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: DeprecationWarning: \n",
      ".ix is deprecated. Please use\n",
      ".loc for label based indexing or\n",
      ".iloc for positional indexing\n",
      "\n",
      "See the documentation here:\n",
      "http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated\n",
      "  This is separate from the ipykernel package so we can avoid doing imports until\n"
     ]
    }
   ],
   "source": [
    "# Add new entry to the dataframe\n",
    "import numpy as np\n",
    "class_df.ix['Student4'] = ['Robin', np.nan, 'History']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Student1</th>\n",
       "      <th>Student2</th>\n",
       "      <th>Student3</th>\n",
       "      <th>Student4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Names</th>\n",
       "      <td>John</td>\n",
       "      <td>Ryan</td>\n",
       "      <td>Emily</td>\n",
       "      <td>Robin</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Standard</th>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Subject</th>\n",
       "      <td>English</td>\n",
       "      <td>Mathematics</td>\n",
       "      <td>Science</td>\n",
       "      <td>History</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Student1     Student2 Student3 Student4\n",
       "Names        John         Ryan    Emily    Robin\n",
       "Standard        7            5        8      NaN\n",
       "Subject   English  Mathematics  Science  History"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "class_df.T                # Take transpose of the dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Names</th>\n",
       "      <th>Standard</th>\n",
       "      <th>Subject</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Student2</th>\n",
       "      <td>Ryan</td>\n",
       "      <td>5.0</td>\n",
       "      <td>Mathematics</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Student1</th>\n",
       "      <td>John</td>\n",
       "      <td>7.0</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Student3</th>\n",
       "      <td>Emily</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Science</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Student4</th>\n",
       "      <td>Robin</td>\n",
       "      <td>NaN</td>\n",
       "      <td>History</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Names  Standard      Subject\n",
       "Student2   Ryan       5.0  Mathematics\n",
       "Student1   John       7.0      English\n",
       "Student3  Emily       8.0      Science\n",
       "Student4  Robin       NaN      History"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "class_df.sort_values(by='Standard')   # Sorting of rows by one column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Adding one more column to the dataframe as Series object\n",
    "col_entry = pd.Series(['A','B','A+','C'], \n",
    "                      index=['Student1','Student2','Student3','Student4' ] )\n",
    "class_df['Grade'] = col_entry"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          Names  Standard      Subject Grade\n",
      "Student1   John       7.0      English     A\n",
      "Student2   Ryan       5.0  Mathematics     B\n",
      "Student3  Emily       8.0      Science    A+\n",
      "Student4  Robin       NaN      History     C\n"
     ]
    }
   ],
   "source": [
    "print(class_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          Names  Standard      Subject Grade\n",
      "Student1   John       7.0      English     A\n",
      "Student2   Ryan       5.0  Mathematics     B\n",
      "Student3  Emily       8.0      Science    A+\n",
      "Student4  Robin      10.0      History     C\n"
     ]
    }
   ],
   "source": [
    "# Filling the missing entries in the dataframe, inplace\n",
    "class_df.fillna(10, inplace=True)\n",
    "print(class_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Concatenation of 2 dataframes\n",
    "student_age = pd.DataFrame(data = {'Age': [13,10,15,18]} ,\n",
    "                           index=['Student1','Student2','Student3','Student4' ] )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          Age\n",
      "Student1   13\n",
      "Student2   10\n",
      "Student3   15\n",
      "Student4   18\n"
     ]
    }
   ],
   "source": [
    "print(student_age)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          Names  Standard      Subject Grade  Age\n",
      "Student1   John       7.0      English     A   13\n",
      "Student2   Ryan       5.0  Mathematics     B   10\n",
      "Student3  Emily       8.0      Science    A+   15\n",
      "Student4  Robin      10.0      History     C   18\n"
     ]
    }
   ],
   "source": [
    "class_data = pd.concat([class_df, student_age ], axis = 1)\n",
    "print(class_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Student1        EnglishSub\n",
       "Student2    MathematicsSub\n",
       "Student3        ScienceSub\n",
       "Student4        HistorySub\n",
       "Name: Subject, dtype: object"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# MAP Function\n",
    "class_data['Subject'] = class_data['Subject'].map(lambda x : x + 'Sub')\n",
    "class_data['Subject']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----Old values-----\n",
      "Student1    13\n",
      "Student2    10\n",
      "Student3    15\n",
      "Student4    18\n",
      "Name: Age, dtype: int64\n",
      "-----New values-----\n",
      "Student1    14\n",
      "Student2    11\n",
      "Student3    16\n",
      "Student4    19\n",
      "Name: Age, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# APPLY Function\n",
    "def age_add(x):                 # Defining a new function which will increment the age by 1\n",
    "    return(x+1)\n",
    "\n",
    "print('-----Old values-----')\n",
    "print(class_data['Age'])\n",
    "print('-----New values-----')\n",
    "print(class_data['Age'].apply(age_add))          # Applying the age function on top of the age column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "category"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Changing datatype of the column\n",
    "class_data['Grade'] = class_data['Grade'].astype('category')\n",
    "class_data.Grade.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Storing the results \n",
    "class_data.to_csv('class_dataset.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
